library(data.table)
library(nnet)

wave1 <- fread('Data/MediaSSI_Dec2017_w1_recoded.csv')
colnames(wave1) <- paste0(colnames(wave1), "w1")

wave1 <- wave1[!is.na(wave1$Progressw1),]
wave1 <- wave1[wave1$consentw1 == 1,] 

dupes <- unique(wave1$PIDw1[(duplicated(wave1$PIDw1))])
for(dupe in dupes){
  to_drop <- which(wave1$PIDw1==dupe)
  maxes <- max(wave1$Progressw1[wave1$PIDw1 == dupe], na.rm=T)
  to_keep <- which(wave1$PIDw1==dupe & wave1$Progressw1 == maxes)
  to_keep <- to_keep[1]
  to_drop <- to_drop[!(to_drop %in% to_keep)]
  wave1 <- wave1[-to_drop,]
}

length(unique(wave1$PIDw1)) == nrow(wave1)


wave2 <- fread('Data/MediaSSI_Dec2017_w2_recoded.csv')
colnames(wave2) <- paste0(colnames(wave2), "w2")
wave2 <- wave2[!is.na(wave2$Progressw2),]

dupes <- unique(wave2$PIDw2[(duplicated(wave2$PIDw2))])
for(dupe in dupes){
  to_drop <- which(wave2$PIDw2==dupe)
  maxes <- max(wave2$Progressw2[wave2$PIDw2 == dupe], na.rm=T)
  to_keep <- which(wave2$PIDw2==dupe & wave2$Progressw2 == maxes)
  to_keep <- to_keep[1]
  to_drop <- to_drop[!(to_drop %in% to_keep)]
  wave2 <- wave2[-to_drop,]
}

length(unique(wave2$PIDw2)) == nrow(wave2)


wave3 <- fread('Data/MediaSSI_Dec2017_w3_recoded.csv')
colnames(wave3) <- paste0(colnames(wave3), "w3")
wave3 <- wave3[!is.na(wave3$Progressw3),]

dupes <- unique(wave3$PIDw3[(duplicated(wave3$PIDw3))])
for(dupe in dupes){
  to_drop <- which(wave3$PIDw3==dupe)
  maxes <- max(wave3$Progressw3[wave3$PIDw3 == dupe], na.rm=T)
  to_keep <- which(wave3$PIDw3==dupe & wave3$Progressw3 == maxes)
  to_keep <- to_keep[1]
  to_drop <- to_drop[!(to_drop %in% to_keep)]
  wave3 <- wave3[-to_drop,]
}

length(unique(wave3$PIDw3)) == nrow(wave3)


df <- merge(wave1, wave2, by.x='PIDw1', by.y = 'PIDw2',all.x=T, all.y=T)
df <- merge(df, wave3, by.x='PIDw1', by.y = 'PIDw3', all.x=T, all.y=T)
df$article_readw1 <- relevel(factor(df$article_readw1), ref='MSNBC')
df$article_readw2 <- relevel(factor(df$article_readw2), ref='MSNBC')

